from PIL import Image
import pytesseract
from typing import List
import re

class OCRHelper:
    @staticmethod
    def extract_words_from_image(image_path: str) -> List[str]:
        try:
            # 使用PIL读取图片
            image = Image.open(image_path)
            
            # 使用Tesseract进行OCR
            text = pytesseract.image_to_string(image)
            
            # 清理并分割文本为单词
            words = re.findall(r'\b[a-zA-Z]+\b', text)
            
            # 返回所有识别出的单词（转换为小写）
            return [word.lower() for word in words]
        
        except Exception as e:
            print(f"Error processing image: {e}")
            return [] 